GLM Part4

Author

Murray Logan

Published

30/07/2023


1 Preparations

Load the necessary libraries

library(glmmTMB)       #for model fitting
library(car)           #for regression diagnostics
library(broom)         #for tidy output
library(ggfortify)     #for model diagnostics
library(DHARMa)        #for residual diagnostics
library(see)           #for plotting residuals
library(knitr)         #for kable
library(effects)       #for partial effects plots
library(ggeffects)     #for partial effects plots
library(emmeans)       #for estimating marginal means
library(modelr)        #for auxillary modelling functions
library(tidyverse)     #for data wrangling
library(lindia)        #for diagnostics of lm and glm
library(performance)   #for residuals diagnostics
library(sjPlot)        #for outputs
library(report)        #for reporting methods/results
library(easystats)     #framework for stats, modelling and visualisation
library(MASS)          #for negative binomials
library(MuMIn)         #for AICc
library(patchwork)     #for figure layouts

2 Scenario

Loyn (1987) modelled the abundance of forest birds with six predictor variables (patch area, distance to nearest patch, distance to nearest larger patch, grazing intensity, altitude and years since the patch had been isolated).

Figure 1: Regent honeyeater
Table 1: Format of loyn.csv data file
ABUND DIST LDIST AREA GRAZE ALT YR.ISOL
.. .. .. .. .. .. ..
Table 2: Description of the variables in the loyn data file
ABUND Abundance of forest birds in patch- response variable
DIST Distance to nearest patch - predictor variable
LDIST Distance to nearest larger patch - predictor variable
AREA Size of the patch - predictor variable
GRAZE Grazing intensity (1 to 5, representing light to heavy) - predictor variable
ALT Altitude - predictor variable
YR.ISOL Number of years since the patch was isolated - predictor variable

The aim of the analysis is to investigate the effects of a range of predictors on the abundance of forest birds.

3 Read in the data

loyn <- read_csv('../public/data/loyn.csv', trim_ws=TRUE)
Rows: 56 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (7): ABUND, AREA, YR.ISOL, DIST, LDIST, GRAZE, ALT

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(loyn)
Rows: 56
Columns: 7
$ ABUND   <dbl> 5.3, 2.0, 1.5, 17.1, 13.8, 14.1, 3.8, 2.2, 3.3, 3.0, 27.6, 1.8…
$ AREA    <dbl> 0.1, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.…
$ YR.ISOL <dbl> 1968, 1920, 1900, 1966, 1918, 1965, 1955, 1920, 1965, 1900, 19…
$ DIST    <dbl> 39, 234, 104, 66, 246, 234, 467, 284, 156, 311, 66, 93, 39, 40…
$ LDIST   <dbl> 39, 234, 311, 66, 246, 285, 467, 1829, 156, 571, 332, 93, 39, …
$ GRAZE   <dbl> 2, 5, 5, 3, 5, 3, 5, 5, 4, 5, 3, 5, 2, 1, 5, 5, 3, 3, 3, 2, 2,…
$ ALT     <dbl> 160, 60, 140, 160, 140, 130, 90, 60, 130, 130, 210, 160, 210, …
loyn |> glimpse()
Rows: 56
Columns: 7
$ ABUND   <dbl> 5.3, 2.0, 1.5, 17.1, 13.8, 14.1, 3.8, 2.2, 3.3, 3.0, 27.6, 1.8…
$ AREA    <dbl> 0.1, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.…
$ YR.ISOL <dbl> 1968, 1920, 1900, 1966, 1918, 1965, 1955, 1920, 1965, 1900, 19…
$ DIST    <dbl> 39, 234, 104, 66, 246, 234, 467, 284, 156, 311, 66, 93, 39, 40…
$ LDIST   <dbl> 39, 234, 311, 66, 246, 285, 467, 1829, 156, 571, 332, 93, 39, …
$ GRAZE   <dbl> 2, 5, 5, 3, 5, 3, 5, 5, 4, 5, 3, 5, 2, 1, 5, 5, 3, 3, 3, 2, 2,…
$ ALT     <dbl> 160, 60, 140, 160, 140, 130, 90, 60, 130, 130, 210, 160, 210, …
## Explore the first 6 rows of the data
loyn |> head()
# A tibble: 6 × 7
  ABUND  AREA YR.ISOL  DIST LDIST GRAZE   ALT
  <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>
1   5.3   0.1    1968    39    39     2   160
2   2     0.5    1920   234   234     5    60
3   1.5   0.5    1900   104   311     5   140
4  17.1   1      1966    66    66     3   160
5  13.8   1      1918   246   246     5   140
6  14.1   1      1965   234   285     3   130
loyn |> str()
spc_tbl_ [56 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ ABUND  : num [1:56] 5.3 2 1.5 17.1 13.8 14.1 3.8 2.2 3.3 3 ...
 $ AREA   : num [1:56] 0.1 0.5 0.5 1 1 1 1 1 1 1 ...
 $ YR.ISOL: num [1:56] 1968 1920 1900 1966 1918 ...
 $ DIST   : num [1:56] 39 234 104 66 246 234 467 284 156 311 ...
 $ LDIST  : num [1:56] 39 234 311 66 246 ...
 $ GRAZE  : num [1:56] 2 5 5 3 5 3 5 5 4 5 ...
 $ ALT    : num [1:56] 160 60 140 160 140 130 90 60 130 130 ...
 - attr(*, "spec")=
  .. cols(
  ..   ABUND = col_double(),
  ..   AREA = col_double(),
  ..   YR.ISOL = col_double(),
  ..   DIST = col_double(),
  ..   LDIST = col_double(),
  ..   GRAZE = col_double(),
  ..   ALT = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 
loyn |> datawizard::data_codebook()
loyn (56 rows and 7 variables, 7 shown)

ID | Name    | Type    | Missings |       Values |          N
---+---------+---------+----------+--------------+-----------
1  | ABUND   | numeric | 0 (0.0%) |  [1.5, 39.6] |         56
---+---------+---------+----------+--------------+-----------
2  | AREA    | numeric | 0 (0.0%) |  [0.1, 1771] |         56
---+---------+---------+----------+--------------+-----------
3  | YR.ISOL | numeric | 0 (0.0%) | [1890, 1976] |         56
---+---------+---------+----------+--------------+-----------
4  | DIST    | numeric | 0 (0.0%) |   [26, 1427] |         56
---+---------+---------+----------+--------------+-----------
5  | LDIST   | numeric | 0 (0.0%) |   [26, 4426] |         56
---+---------+---------+----------+--------------+-----------
6  | GRAZE   | numeric | 0 (0.0%) |            1 | 13 (23.2%)
   |         |         |          |            2 |  8 (14.3%)
   |         |         |          |            3 | 15 (26.8%)
   |         |         |          |            4 |  7 (12.5%)
   |         |         |          |            5 | 13 (23.2%)
---+---------+---------+----------+--------------+-----------
7  | ALT     | numeric | 0 (0.0%) |    [60, 260] |         56
-------------------------------------------------------------

4 Exploratory data analysis

A scatterplot matrix plots each variable against each other. It is useful to provide boxplots in the diagonals to help explore the distributions.

scatterplotMatrix(~ABUND+DIST+LDIST+AREA+GRAZE+ALT+YR.ISOL, data=loyn,
                  diagonal = list(method='boxplot'))

Conclusions:

  • on the top row, ABUND is on the y-axis of each plot
  • the second plot from the top left has DIST on the x-axis
  • it is clear from looking at the boxplots on the diagonals that some of the potential predictors (DIST, LDIST and AREA) are not normally distributed.
  • it might be worth log-transforming these variables. Not only might this help normalise those predictors, it might also improve the linearity between the response and those predictors.
  • we will reserve all other judgements of assumptions until we explore these log-transformations
scatterplotMatrix(~ABUND+log(DIST)+log(LDIST)+log(AREA)+GRAZE+ALT+YR.ISOL, data=loyn,
                  diagonal = list(method='boxplot'))

Conclusions:

  • normality now seems to be satisfied
  • there is no evidence of real non-linearity in the relationships depicted in plots along the top row (that relate the response to the various predictors)
  • it does appear that some of the predictors might be correlated to one another (DIST and LDIST in particular). It may be necessary for use to chose between these two measures of between-patch distance.

The GRAZE predictor represents grazing intensity. Note, this was essentially a categorical variable with levels 1 (no grazing) through to 5 (intense grazing). Although we could attempt to model this as a continuous predictor, such an approach would assume that this is a linear scale in which the interval between each successive level is the same. That is, the difference between level 1 and 2 is the same as the difference between 4 and 5 etc. Since, these were categories, such spacing is not guaranteed. It might therefore be better to model this variable as a categorical variable.

Finally, even in the absence of issues relating to the range of each predictor within each level of GRAZE, it might still be advantageous to centre each predictor. Centering will provide computational advantages and also ensure that the intercept has a more useful meaning.

loyn <- loyn |> mutate(fGRAZE=factor(GRAZE))

As the model now includes both continuous and categorical predictors, unless we include interactions between the categorical variable and each of the continuous predictors, we must also assume that partial slopes of each predictor is the same for each level of the categorical predictor (homogeneity of slopes).

We will explore this with just two of the predictors (DIST and AREA - both on log axes)

p1=ggplot(loyn, aes(y=ABUND, x=DIST, color=fGRAZE)) +
    geom_smooth(method='lm') +
  scale_x_log10()

p2=ggplot(loyn, aes(y=ABUND, x=AREA, color=fGRAZE)) +
    geom_smooth(method='lm') +
  scale_x_log10()

p1 + p2

Conclusions:

  • homogeneity of slopes seems to be satisfied for DIST, but possibly not for AREA.
  • hence, the rate of change associated with AREA for the first GRAZE level might not be a good representation of the rate of change for other GRAZE levels.
  • also of some concern is that the range of AREA is not the same for each GRAZE level - this can also lead to problems

One thing we could do to address the different ranges is scale (standardise) or re-scale AREA separately within each GRAZE level.

## might need to consider scaling separtely within each group
loyn <- loyn |>
  group_by(fGRAZE) |>
  mutate(rslAREA = scales::rescale(log(AREA)),
         slAREA = scale(log(AREA))) |>
  ungroup()

ggplot(loyn, aes(y=ABUND, x=slAREA, color=fGRAZE)) +
    geom_smooth(method='lm') #+

ggplot(loyn, aes(y=ABUND, x=rslAREA, color=fGRAZE)) +
    geom_smooth(method='lm') #+

For the current example, we will treat the continuous predictors as if they have satisfied this assumption.

5 Fit the model

Model formula:

\[ log(y_i) = \mathcal{N}(\mu_i, \sigma^2)\\ \mu_i = \beta_0 + \beta_1 X_1 + ... \]

where \(\boldsymbol{\beta}\) is a vector of effects parameters and \(\bf{X}\) is a model matrix representing the additive effects of the scaled versions of distance (ln), distance to the nearest large patch (ln), patch area (ln), grazing intensity, year of isolation and altitude on the abundance of forest birds.

## loyn_glm <- glm(log(ABUND)~scale(log(DIST), scale=FALSE) + scale(log(LDIST), scale=FALSE) +
##                 scale(log(AREA), scale=FALSE)+
##                 fGRAZE + scale(ALT, scale=FALSE) + scale(YR.ISOL, scale=FALSE),
##               data=loyn, family=gaussian())
loyn_glm <- glm(log(ABUND) ~ center(log(DIST)) + center(log(LDIST)) +
                center(log(AREA))+
                fGRAZE + center(ALT) + center(YR.ISOL),
              data=loyn, family=gaussian())

Model formula: \[ y_i \sim{} \mathcal{N}(\mu_i, \sigma^2)\\ log(\mu_i) = \boldsymbol{\beta} \bf{X_i} \]

where \(\boldsymbol{\beta}\) is a vector of effects parameters and \(\bf{X}\) is a model matrix representing the additive effects of the scaled versions of distance (ln), distance to the nearest large patch (ln), patch area (ln), grazing intensity, year of isolation and altitude on the abundance of forest birds.

To centre a predictor in R, we can use the scale function. This function is able to centre and scale a vector, or only centre a vector (by indicating that it should not scale). In addition to Centering (and/or scaling if instructed), this function will also store the original mean (and standard deviation) of the original predictor as attributes, thereby facilitating back transformations.

## loyn_glm1 <- glm(ABUND~scale(log(DIST), scale=FALSE) + scale(log(LDIST), scale=FALSE) +
##                   scale(log(AREA), scale=FALSE)+
##                   fGRAZE + scale(ALT, scale=FALSE) + scale(YR.ISOL, scale=FALSE),
##               data=loyn, family=gaussian(link='log'))
loyn_glm1 <- glm(ABUND ~ center(log(DIST)) + center(log(LDIST)) +
                center(log(AREA))+
                fGRAZE + center(ALT) + center(YR.ISOL),
              data=loyn, family=gaussian(link = "log"))

Model formula: \[ y_i = \mathcal{Gamma}(\mu_i, \sigma^2)\\ log(\mu_i) = \beta_0 + \beta_1 X_1 + ... \]

where \(\boldsymbol{\beta}\) is a vector of effects parameters and \(\bf{X}\) is a model matrix representing the additive effects of the scaled versions of distance (ln), distance to the nearest large patch (ln), patch area (ln), grazing intensity, year of isolation and altitude on the abundance of forest birds.

## loyn_glm2 <- glm(ABUND~scale(log(DIST), scale=FALSE) + scale(log(LDIST), scale=FALSE) +
##                    scale(log(AREA), scale=FALSE)+
##                    fGRAZE + scale(ALT, scale=FALSE) + scale(YR.ISOL, scale=FALSE),
##                data=loyn, family=Gamma(link='log'))
loyn_glm2 <- glm(ABUND ~ center(log(DIST)) + center(log(LDIST)) +
                center(log(AREA))+
                fGRAZE + center(ALT) + center(YR.ISOL),
              data=loyn, family=Gamma(link = "log"))

6 Model validation

7 Partial plots

## Effects
## loyn_glm |> plot_model(type='eff', show.data=TRUE, dot.size=0.5) |> plot_grid()
## Predictions
loyn_glm |> plot_model(type='pred', show.data=TRUE, dot.size=0.5) |> plot_grid()

## We can sort of do part of the backtransforms - but only partly..
plot_grid(list(
    loyn_glm |> plot_model(type='eff',  terms='DIST') + scale_x_log10(),
    loyn_glm |> plot_model(type='eff',  terms='LDIST') + scale_x_log10(),
    loyn_glm |> plot_model(type='eff',  terms='AREA') + scale_x_log10(),
    loyn_glm |> plot_model(type='eff',  terms='fGRAZE'),
    loyn_glm |> plot_model(type='eff',  terms='ALT'),
    loyn_glm |> plot_model(type='eff',  terms='YR.ISOL')  
))

loyn_glm |> allEffects(residuals=TRUE) |> plot(type='response')

loyn_glm |> ggpredict() |>
  plot(add.data=TRUE, facet=TRUE, jitter=FALSE)

loyn_glm |> ggemmeans(~AREA|fGRAZE) |>
    plot(add.data=TRUE, jitter=FALSE)

loyn_glm |> ggemmeans(~AREA|fGRAZE) |>
  plot(add.data=TRUE, jitter=FALSE) +
  scale_x_log10() +
  scale_y_log10()

## unfortunately, it is not currently possible to influence the
## prediction grid so we are stuck with awful looking figures.
## Predictions
loyn_glm1 |> plot_model(type='pred', show.data=TRUE, dot.size=0.5) |> plot_grid()

## We can sort of do part of the backtransforms - but only partly..
plot_grid(list(
    loyn_glm1 |> plot_model(type='eff',  terms='DIST') + scale_x_log10(),
    loyn_glm1 |> plot_model(type='eff',  terms='LDIST') + scale_x_log10(),
    loyn_glm1 |> plot_model(type='eff',  terms='AREA') + scale_x_log10(),
    loyn_glm1 |> plot_model(type='eff',  terms='fGRAZE'),
    loyn_glm1 |> plot_model(type='eff',  terms='ALT'),
    loyn_glm1 |> plot_model(type='eff',  terms='YR.ISOL')  
))

loyn_glm1 |> allEffects(residuals=TRUE) |> plot(type='response')

loyn_glm1 |> ggpredict() |>
  plot(add.data=TRUE, facet=TRUE, jitter=FALSE)

loyn_glm1 |> ggpredict() |>
  plot(add.data=TRUE, facet=TRUE, jitter=FALSE)

loyn_glm1 |> ggemmeans(~AREA|fGRAZE) |>
    plot(add.data=TRUE, jitter=FALSE) 

loyn_glm1 |> ggemmeans(~AREA|fGRAZE) |>
    plot(add.data=TRUE, jitter=FALSE) +
    scale_x_log10()

## unfortunately, it is not currently possible to influence the
## prediction grid so we are stuck with awful looking figures.
## Predictions
loyn_glm2 |> plot_model(type='pred', show.data=TRUE, dot.size=0.5) |> plot_grid()

## We can sort of do part of the backtransforms - but only partly..
plot_grid(list(
    loyn_glm2 |> plot_model(type='eff',  terms='DIST') + scale_x_log10(),
    loyn_glm2 |> plot_model(type='eff',  terms='LDIST') + scale_x_log10(),
    loyn_glm2 |> plot_model(type='eff',  terms='AREA') + scale_x_log10(),
    loyn_glm2 |> plot_model(type='eff',  terms='fGRAZE'),
    loyn_glm2 |> plot_model(type='eff',  terms='ALT'),
    loyn_glm2 |> plot_model(type='eff',  terms='YR.ISOL')  
))

loyn_glm2 |> allEffects(residuals=TRUE) |> plot(type='response')

loyn_glm2 |> ggpredict() |>
  plot(add.data=TRUE, facet=TRUE, jitter=FALSE)

loyn_glm2 |> ggemmeans(~AREA|fGRAZE) |>
    plot(add.data=TRUE, jitter=FALSE)

loyn_glm2 |> ggemmeans(~AREA|fGRAZE) |>
    plot(add.data=TRUE, jitter=FALSE) +
    scale_x_log10() +
    scale_x_log10()

## unfortunately, it is not currently possible to influence the
## prediction grid so we are stuck with awful looking figures.

8 Caterpillar plot

loyn_glm |> plot_model(type='est')

loyn_glm |> plot_model(type='est', transform='exp', show.values=TRUE)

loyn_glm |>
  parameters() |> 
  plot()

loyn_glm |>
  parameters(exponentiate = TRUE) |> 
  plot()

loyn_glm |>
  parameters(exponentiate = TRUE) |> 
  plot(show_labels =  TRUE) +
  scale_x_continuous(trans =  "log2", limits =  c(0.1,6))

loyn_glm1 |> plot_model(type='est')

loyn_glm1 |> plot_model(type='est', transform='exp', show.values=TRUE)

loyn_glm1 |>
  parameters() |> 
  plot()

loyn_glm1 |>
  parameters(exponentiate = TRUE) |> 
  plot(show_labels = TRUE) + 
  scale_x_continuous(trans =  "log2", limits =  c(0.1,6)) 

loyn_glm2 |> plot_model(type='est')

loyn_glm2 |> plot_model(type='est', transform='exp', show.values=TRUE)

loyn_glm2 |>
  parameters() |> 
  plot()

loyn_glm2 |>
  parameters(exponentiate = TRUE) |> 
  plot(show_labels = TRUE) +
  scale_x_continuous(trans = "log2", limits =  c(0.2,6))

loyn_glm2 |>
  simulate_parameters(exponentiate = TRUE) |> 
  plot(stack = FALSE, normalize_height = TRUE)

compare_parameters(loyn_glm, loyn_glm1) |>
  plot()

compare_parameters(loyn_glm, loyn_glm1, loyn_glm2) |>
  display()
Parameter loyn_glm loyn_glm1 loyn_glm2
(Intercept) 2.79 (2.37, 3.22) 3.09 (2.87, 3.32) 2.88 (2.51, 3.25)
center(DIST) (center(log) 0.05 (-0.17, 0.26) -0.02 (-0.14, 0.10) 0.05 (-0.13, 0.24)
center(LDIST) (center(log) 0.05 (-0.12, 0.22) 0.04 (-0.05, 0.13) 2.18e-03 (-0.14, 0.15)
center(AREA) (center(log) 0.22 (0.10, 0.33) 0.11 (0.04, 0.17) 0.21 (0.11, 0.31)
fGRAZE (2) 0.23 (-0.35, 0.82) 0.04 (-0.26, 0.34) 0.16 (-0.35, 0.67)
fGRAZE (3) 0.23 (-0.30, 0.76) 0.02 (-0.26, 0.30) 0.24 (-0.22, 0.71)
fGRAZE (4) 0.10 (-0.48, 0.67) -1.20e-03 (-0.32, 0.31) 0.07 (-0.43, 0.57)
fGRAZE (5) -0.89 (-1.75, -0.03) -1.12 (-1.81, -0.43) -0.78 (-1.53, -0.03)
center(ALT) 1.32e-03 (-2.96e-03, 5.61e-03) -7.44e-05 (-2.32e-03, 2.17e-03) 1.12e-03 (-2.63e-03, 4.86e-03)
center(YR ISOL) 1.71e-03 (-8.69e-03, 0.01) -7.13e-04 (-6.57e-03, 5.14e-03) 1.58e-03 (-7.51e-03, 0.01)
Observations 56 56 56
## Check out rope
sdy <- loyn$ABUND |> log() |> sd()
loyn_glm2 |> equivalence_test(range =  c(-0.1, 0.1) * sdy) |>
  plot()

9 Model investigation / hypothesis testing

loyn_glm |> summary()

Call:
glm(formula = log(ABUND) ~ center(log(DIST)) + center(log(LDIST)) + 
    center(log(AREA)) + fGRAZE + center(ALT) + center(YR.ISOL), 
    family = gaussian(), data = loyn)

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.794641   0.209884  13.315  < 2e-16 ***
center(log(DIST))   0.048037   0.106225   0.452 0.653237    
center(log(LDIST))  0.049897   0.082636   0.604 0.548933    
center(log(AREA))   0.217187   0.058115   3.737 0.000513 ***
fGRAZE2             0.232934   0.289494   0.805 0.425177    
fGRAZE3             0.225478   0.263369   0.856 0.396363    
fGRAZE4             0.097774   0.284703   0.343 0.732844    
fGRAZE5            -0.890477   0.425336  -2.094 0.041838 *  
center(ALT)         0.001325   0.002128   0.623 0.536622    
center(YR.ISOL)     0.001710   0.005166   0.331 0.742209    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.2953154)

    Null deviance: 44.791  on 55  degrees of freedom
Residual deviance: 13.585  on 46  degrees of freedom
AIC: 101.6

Number of Fisher Scoring iterations: 2

Conclusions:

  • the estimated y-intercept (bird abundance on the log scale at Grazing level 1, when all other centred continuous predictors are at their average value of 0) is 2.79. Note that this is still on the link (log) scale.
  • if however, we exponentiate it (to back-transform it onto the response scale), then the bird abundance in Grazing level 1 at the average level of all other predictors is 16.36
  • associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) 0.22.
  • if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of 1.24. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase 1.24 fold. That is, there is a 24 percent increase per 1 unit increase in centred log Area.
  • on the basis of one unit level changes, Grazing level 5 (actually, the difference between Grazing level 1 and 5), has the largest relative effect on bird abundances followed by centred log Area.
  • the \(R^2\) value is 0.7. Hence, 70 percent of the variation in number of Individuals is explained by its relationship to Area.
loyn_glm |> confint()
Waiting for profiling to be done...
                          2.5 %       97.5 %
(Intercept)         2.383277032  3.206005503
center(log(DIST))  -0.160160724  0.256234688
center(log(LDIST)) -0.112067311  0.211861575
center(log(AREA))   0.103283017  0.331091188
fGRAZE2            -0.334465021  0.800332435
fGRAZE3            -0.290714978  0.741670896
fGRAZE4            -0.460233638  0.655781439
fGRAZE5            -1.724121112 -0.056833429
center(ALT)        -0.002845377  0.005494663
center(YR.ISOL)    -0.008415597  0.011834628
loyn_glm |> confint() |> exp()
Waiting for profiling to be done...
                        2.5 %     97.5 %
(Intercept)        10.8403690 24.6803037
center(log(DIST))   0.8520068  1.2920559
center(log(LDIST))  0.8939841  1.2359768
center(log(AREA))   1.1088052  1.3924868
fGRAZE2             0.7157209  2.2262809
fGRAZE3             0.7477288  2.0994405
fGRAZE4             0.6311362  1.9266475
fGRAZE5             0.1783297  0.9447514
center(ALT)         0.9971587  1.0055098
center(YR.ISOL)     0.9916197  1.0119049

We can tidy the summary table and express the coefficients on the response scale. Recall that when we do so, the partial slopes are no longer additive. Rather, they represent a multiplicative change. Furthermore, effects greater than 1 represent an increase, whereas effects less than 1 represent an increase. Hence, an effect of 0.5 represents a halving.

Note also that confidence intervals should be interpreted relative to a value of 1 after such a back-transformation.

loyn_glm |> tidy(conf.int=TRUE, exponentiate = TRUE)
# A tibble: 10 × 7
   term               estimate std.error statistic  p.value conf.low conf.high
   <chr>                 <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
 1 (Intercept)          16.4     0.210      13.3   2.16e-17   10.8      24.7  
 2 center(log(DIST))     1.05    0.106       0.452 6.53e- 1    0.852     1.29 
 3 center(log(LDIST))    1.05    0.0826      0.604 5.49e- 1    0.894     1.24 
 4 center(log(AREA))     1.24    0.0581      3.74  5.13e- 4    1.11      1.39 
 5 fGRAZE2               1.26    0.289       0.805 4.25e- 1    0.716     2.23 
 6 fGRAZE3               1.25    0.263       0.856 3.96e- 1    0.748     2.10 
 7 fGRAZE4               1.10    0.285       0.343 7.33e- 1    0.631     1.93 
 8 fGRAZE5               0.410   0.425      -2.09  4.18e- 2    0.178     0.945
 9 center(ALT)           1.00    0.00213     0.623 5.37e- 1    0.997     1.01 
10 center(YR.ISOL)       1.00    0.00517     0.331 7.42e- 1    0.992     1.01 
loyn_glm |> glance()
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1          44.8      55  -39.8  102.  124.     13.6          46    56

The magnitude of (partial) slopes are dependent on the scale of each of the underlying predictors. Therefore, we cannot glean the relative importance of each of the predictors relative to each other simply by comparing their partial slopes. We could refit the model including scaled (and centred) versions of the predictors - this would put all partial slopes on a common scale thereby permitting comparisons. However, this would then make the individual partial slopes difficult to interpret with respect to the observed predictor scale. For example, what does a change of one unit on a scaled scale mean in real world measurements?

The alternative is to fit the model with unscaled (yet centred) predictors (as we have done) and then standardise the partial slopes by multiplication with the respective predictor standard deviations divided by the standard deviation of the response. This can be performed by the std.coef() function in the MuMIn package. This function can also correct for a certain degree of (multi)collinearity by standardising by partial standard deviation.

loyn_glm |> std.coef(partial.sd=TRUE)
                   Estimate* Std. Error* df
(Intercept)         0.000000    0.000000 46
center(log(DIST))   0.032840    0.072619 46
center(log(LDIST))  0.043848    0.072619 46
center(log(AREA))   0.271389    0.072619 46
fGRAZE2             0.058431    0.072619 46
fGRAZE3             0.062171    0.072619 46
fGRAZE4             0.024939    0.072619 46
fGRAZE5            -0.152034    0.072619 46
center(ALT)         0.045212    0.072619 46
center(YR.ISOL)     0.024031    0.072619 46
loyn_glm |> parameters() |> display()
Parameter Coefficient SE 95% CI t(46) p
(Intercept) 2.79 0.21 (2.38, 3.21) 13.32 < .001
center(DIST) (center(log) 0.05 0.11 (-0.16, 0.26) 0.45 0.651
center(LDIST) (center(log) 0.05 0.08 (-0.11, 0.21) 0.60 0.546
center(AREA) (center(log) 0.22 0.06 (0.10, 0.33) 3.74 < .001
fGRAZE (2) 0.23 0.29 (-0.33, 0.80) 0.80 0.421
fGRAZE (3) 0.23 0.26 (-0.29, 0.74) 0.86 0.392
fGRAZE (4) 0.10 0.28 (-0.46, 0.66) 0.34 0.731
fGRAZE (5) -0.89 0.43 (-1.72, -0.06) -2.09 0.036
center(ALT) 1.32e-03 2.13e-03 (-2.85e-03, 5.49e-03) 0.62 0.534
center(YR ISOL) 1.71e-03 5.17e-03 (-8.42e-03, 0.01) 0.33 0.741
loyn_glm |> parameters(exponentiate = TRUE) |> display()
Parameter Coefficient SE 95% CI t(46) p
(Intercept) 16.36 3.43 (10.84, 24.68) 13.32 < .001
center(DIST) (center(log) 1.05 0.11 (0.85, 1.29) 0.45 0.651
center(LDIST) (center(log) 1.05 0.09 (0.89, 1.24) 0.60 0.546
center(AREA) (center(log) 1.24 0.07 (1.11, 1.39) 3.74 < .001
fGRAZE (2) 1.26 0.37 (0.72, 2.23) 0.80 0.421
fGRAZE (3) 1.25 0.33 (0.75, 2.10) 0.86 0.392
fGRAZE (4) 1.10 0.31 (0.63, 1.93) 0.34 0.731
fGRAZE (5) 0.41 0.17 (0.18, 0.94) -2.09 0.036
center(ALT) 1.00 2.13e-03 (1.00, 1.01) 0.62 0.534
center(YR ISOL) 1.00 5.17e-03 (0.99, 1.01) 0.33 0.741
loyn_glm |> 
  standardise_parameters(method =  "basic") |>
  display()
# Standardization method: basic
Parameter Std. Coef. 95% CI
(Intercept) 0.00 [ 0.00, 0.00]
center(log(DIST)) 0.05 [-0.17, 0.27]
center(log(LDIST)) 0.07 [-0.16, 0.31]
center(log(AREA)) 0.45 [ 0.21, 0.69]
fGRAZE2 0.09 [-0.13, 0.31]
fGRAZE3 0.11 [-0.14, 0.37]
fGRAZE4 0.04 [-0.17, 0.24]
fGRAZE5 -0.42 [-0.81, -0.03]
center(ALT) 0.06 [-0.14, 0.27]
center(YR.ISOL) 0.05 [-0.24, 0.34]
loyn_glm |> 
  standardise_parameters(method =  "basic") |>
  plot() 

loyn_glm |>
  standardise_parameters(method =  "basic") |> 
  equivalence_test() |> plot()

loyn_glm |>
  equivalence_test(range =  c(-0.1,0.1) * sd(log(loyn$ABUND))) |>
  plot()

loyn_glm1 |> summary()

Call:
glm(formula = ABUND ~ center(log(DIST)) + center(log(LDIST)) + 
    center(log(AREA)) + fGRAZE + center(ALT) + center(YR.ISOL), 
    family = gaussian(link = "log"), data = loyn)

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)         3.092e+00  1.129e-01  27.400  < 2e-16 ***
center(log(DIST))  -1.896e-02  6.008e-02  -0.316  0.75373    
center(log(LDIST))  4.313e-02  4.531e-02   0.952  0.34623    
center(log(AREA))   1.090e-01  3.187e-02   3.421  0.00132 ** 
fGRAZE2             3.964e-02  1.490e-01   0.266  0.79134    
fGRAZE3             1.965e-02  1.378e-01   0.143  0.88717    
fGRAZE4            -1.197e-03  1.562e-01  -0.008  0.99392    
fGRAZE5            -1.122e+00  3.436e-01  -3.264  0.00208 ** 
center(ALT)        -7.436e-05  1.117e-03  -0.067  0.94719    
center(YR.ISOL)    -7.133e-04  2.909e-03  -0.245  0.80737    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 42.40928)

    Null deviance: 6337.9  on 55  degrees of freedom
Residual deviance: 1950.8  on 46  degrees of freedom
AIC: 379.76

Number of Fisher Scoring iterations: 6

Conclusions:

  • the estimated y-intercept (bird abundance on the log scale at Grazing level 1, when all other centred continuous predictors are at their average value of 0) is 3.09. Note that this is still on the link (log) scale.
  • if however, we exponentiate it (to back-transform it onto the response scale), then the bird abundance in Grazing level 1 at the average level of all other predictors is 22.03
  • associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) 0.11.
  • if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of 1.12. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase 1.12 fold. That is, there is a 12 percent increase per 1 unit increase in centred log Area.
  • on the basis of one unit level changes, Grazing level 5 (actually, the difference between Grazing level 1 and 5), has the largest relative effect on bird abundances followed by centred log Area.
  • the \(R^2\) value is 0.69. Hence, 69 percent of the variation in number of Individuals is explained by its relationship to Area.
loyn_glm1 |> confint()
Waiting for profiling to be done...
                          2.5 %       97.5 %
(Intercept)         2.857373688  3.304173722
center(log(DIST))  -0.137764975  0.103001378
center(log(LDIST)) -0.048993891  0.130399690
center(log(AREA))   0.048471352  0.172908096
fGRAZE2            -0.270270322  0.329472512
fGRAZE3            -0.253401689  0.291835537
fGRAZE4            -0.328680891  0.304845014
fGRAZE5            -1.964415830 -0.489194377
center(ALT)        -0.002307881  0.002204718
center(YR.ISOL)    -0.006029603  0.005436731
loyn_glm1 |> confint() |> exp()
Waiting for profiling to be done...
                        2.5 %     97.5 %
(Intercept)        17.4157277 27.2260360
center(log(DIST))   0.8713034  1.1084929
center(log(LDIST))  0.9521869  1.1392837
center(log(AREA))   1.0496653  1.1887568
fGRAZE2             0.7631732  1.3902346
fGRAZE3             0.7761560  1.3388828
fGRAZE4             0.7198727  1.3564148
fGRAZE5             0.1402378  0.6131201
center(ALT)         0.9976948  1.0022072
center(YR.ISOL)     0.9939885  1.0054515

We can tidy the summary table and express the coefficients on the response scale. Recall that when we do so, the partial slopes are no longer additive. Rather, they represent a multiplicative change. Furthermore, effects greater than 1 represent an increase, whereas effects less than 1 represent an increase. Hence, an effect of 0.5 represents a halving.

Note also that confidence intervals should be interpreted relative to a value of 1 after such a back-transformation.

loyn_glm1 |> tidy(conf.int=TRUE, exponentiate = TRUE)
# A tibble: 10 × 7
   term               estimate std.error statistic  p.value conf.low conf.high
   <chr>                 <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
 1 (Intercept)          22.0     0.113    27.4     3.92e-30   17.4      27.2  
 2 center(log(DIST))     0.981   0.0601   -0.316   7.54e- 1    0.871     1.11 
 3 center(log(LDIST))    1.04    0.0453    0.952   3.46e- 1    0.952     1.14 
 4 center(log(AREA))     1.12    0.0319    3.42    1.32e- 3    1.05      1.19 
 5 fGRAZE2               1.04    0.149     0.266   7.91e- 1    0.763     1.39 
 6 fGRAZE3               1.02    0.138     0.143   8.87e- 1    0.776     1.34 
 7 fGRAZE4               0.999   0.156    -0.00766 9.94e- 1    0.720     1.36 
 8 fGRAZE5               0.326   0.344    -3.26    2.08e- 3    0.140     0.613
 9 center(ALT)           1.00    0.00112  -0.0666  9.47e- 1    0.998     1.00 
10 center(YR.ISOL)       0.999   0.00291  -0.245   8.07e- 1    0.994     1.01 
loyn_glm1 |> glance()
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1         6338.      55  -179.  380.  402.    1951.          46    56

The magnitude of (partial) slopes are dependent on the scale of each of the underlying predictors. Therefore, we cannot glean the relative importance of each of the predictors relative to each other simply by comparing their partial slopes. We could refit the model including scaled (and centred) versions of the predictors - this would put all partial slopes on a common scale thereby permitting comparisons. However, this would then make the individual partial slopes difficult to interpret with respect to the observed predictor scale. For example, what does a change of one unit on a scaled scale mean in real world measurements?

The alternative is to fit the model with unscaled (yet centred) predictors (as we have done) and then standardise the partial slopes by multiplication with the respective predictor standard deviations divided by the standard deviation of the response. This can be performed by the std.coef() function in the MuMIn package. This function can also correct for a certain degree of (multi)collinearity by standardising by partial standard deviation.

loyn_glm1 |> std.coef(partial.sd=TRUE)
                     Estimate* Std. Error* df
(Intercept)         0.00000000  0.00000000 46
center(log(DIST))  -0.01277395  0.04047501 46
center(log(LDIST))  0.03995587  0.04198390 46
center(log(AREA))   0.13271817  0.03879196 46
fGRAZE2             0.01019205  0.03830019 46
fGRAZE3             0.00573239  0.04017770 46
fGRAZE4            -0.00031449  0.04103197 46
fGRAZE5            -0.37566409  0.11509812 46
center(ALT)        -0.00259578  0.03898018 46
center(YR.ISOL)    -0.01449969  0.05912696 46
loyn_glm1 |> parameters() |> display()
Parameter Coefficient SE 95% CI t(46) p
(Intercept) 3.09 0.11 (2.86, 3.30) 27.40 < .001
center(DIST) (center(log) -0.02 0.06 (-0.14, 0.10) -0.32 0.752
center(LDIST) (center(log) 0.04 0.05 (-0.05, 0.13) 0.95 0.341
center(AREA) (center(log) 0.11 0.03 (0.05, 0.17) 3.42 < .001
fGRAZE (2) 0.04 0.15 (-0.27, 0.33) 0.27 0.790
fGRAZE (3) 0.02 0.14 (-0.25, 0.29) 0.14 0.887
fGRAZE (4) -1.20e-03 0.16 (-0.33, 0.30) -7.66e-03 0.994
fGRAZE (5) -1.12 0.34 (-1.96, -0.49) -3.26 0.001
center(ALT) -7.44e-05 1.12e-03 (-2.31e-03, 2.20e-03) -0.07 0.947
center(YR ISOL) -7.13e-04 2.91e-03 (-6.03e-03, 5.44e-03) -0.25 0.806
loyn_glm1 |> parameters(exponentiate = TRUE) |> display()
Parameter Coefficient SE 95% CI t(46) p
(Intercept) 22.03 2.49 (17.42, 27.23) 27.40 < .001
center(DIST) (center(log) 0.98 0.06 (0.87, 1.11) -0.32 0.752
center(LDIST) (center(log) 1.04 0.05 (0.95, 1.14) 0.95 0.341
center(AREA) (center(log) 1.12 0.04 (1.05, 1.19) 3.42 < .001
fGRAZE (2) 1.04 0.16 (0.76, 1.39) 0.27 0.790
fGRAZE (3) 1.02 0.14 (0.78, 1.34) 0.14 0.887
fGRAZE (4) 1.00 0.16 (0.72, 1.36) -7.66e-03 0.994
fGRAZE (5) 0.33 0.11 (0.14, 0.61) -3.26 0.001
center(ALT) 1.00 1.12e-03 (1.00, 1.00) -0.07 0.947
center(YR ISOL) 1.00 2.91e-03 (0.99, 1.01) -0.25 0.806
loyn_glm1 |> 
  standardise_parameters(method =  "basic") |>
  display()
# Standardization method: basic
Parameter Std. Coef. 95% CI
(Intercept) 0.00 [ 0.00, 0.00]
center(log(DIST)) -1.68e-03 [-0.01, 0.01]
center(log(LDIST)) 5.32e-03 [-0.01, 0.02]
center(log(AREA)) 0.02 [ 0.01, 0.03]
fGRAZE2 1.30e-03 [-0.01, 0.01]
fGRAZE3 8.18e-04 [-0.01, 0.01]
fGRAZE4 -3.72e-05 [-0.01, 0.01]
fGRAZE5 -0.04 [-0.08, -0.02]
center(ALT) -3.02e-04 [-0.01, 0.01]
center(YR.ISOL) -1.70e-03 [-0.01, 0.01]
loyn_glm1 |> 
  standardise_parameters(method =  "basic") |>
  plot() 

loyn_glm1 |>
  standardise_parameters(method =  "basic") |> 
  equivalence_test() |> plot()

loyn_glm1 |>
  equivalence_test(range =  c(-0.1,0.1) * sd(log(loyn$ABUND))) |>
  plot()

loyn_glm2 |> summary()

Call:
glm(formula = ABUND ~ center(log(DIST)) + center(log(LDIST)) + 
    center(log(AREA)) + fGRAZE + center(ALT) + center(YR.ISOL), 
    family = Gamma(link = "log"), data = loyn)

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)         2.884175   0.183515  15.716  < 2e-16 ***
center(log(DIST))   0.052006   0.092880   0.560 0.578246    
center(log(LDIST))  0.002178   0.072255   0.030 0.976083    
center(log(AREA))   0.211406   0.050814   4.160 0.000137 ***
fGRAZE2             0.157832   0.253125   0.624 0.536013    
fGRAZE3             0.242847   0.230281   1.055 0.297130    
fGRAZE4             0.073447   0.248935   0.295 0.769289    
fGRAZE5            -0.776904   0.371900  -2.089 0.042267 *  
center(ALT)         0.001120   0.001860   0.602 0.550250    
center(YR.ISOL)     0.001578   0.004517   0.349 0.728467    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Gamma family taken to be 0.2257743)

    Null deviance: 31.061  on 55  degrees of freedom
Residual deviance: 11.960  on 46  degrees of freedom
AIC: 398.17

Number of Fisher Scoring iterations: 7

Conclusions:

  • the estimated y-intercept (bird abundance on the log scale at Grazing level 1, when all other centred continuous predictors are at their average value of 0) is 2.88. Note that this is still on the link (log) scale.
  • if however, we exponentiate it (to back-transform it onto the response scale), then the bird abundance in Grazing level 1 at the average level of all other predictors is 17.89
  • associated with each of the continuous predictors is a partial slope. Each partial slope is the rate of change between bird abundance and the associated predictor (on the log scale due to the link and based on 1 unit change in the predictor on the scale of the predictor). For example, for every one unit change in centred log patch Area, bird abundance is expected to increase by (log) 0.21.
  • if we back transform this slope (by exponentiation), we get a partial slope for centred log Area of 1.24. This is interpreted as - for every 1 unit increase in (scaled log) Area, the bird abundance is expected to increase 1.24 fold. That is, there is a 24 percent increase per 1 unit increase in centred log Area.
  • on the basis of one unit level changes, Grazing level 5 (actually, the difference between Grazing level 1 and 5), has the largest relative effect on bird abundances followed by centred log Area.
  • the \(R^2\) value is 0.64. Hence, 64 percent of the variation in number of Individuals is explained by its relationship to Area.
loyn_glm2 |> confint()
Waiting for profiling to be done...
                          2.5 %       97.5 %
(Intercept)         2.541390668  3.247318504
center(log(DIST))  -0.136276672  0.231770632
center(log(LDIST)) -0.139092639  0.148026636
center(log(AREA))   0.104123690  0.319147655
fGRAZE2            -0.325629378  0.655552869
fGRAZE3            -0.216533500  0.701556277
fGRAZE4            -0.396478859  0.558696246
fGRAZE5            -1.490328207 -0.078310395
center(ALT)        -0.002428384  0.004625928
center(YR.ISOL)    -0.007727843  0.010332742
loyn_glm2 |> confint() |> exp()
Waiting for profiling to be done...
                        2.5 %     97.5 %
(Intercept)        12.6973164 25.7212759
center(log(DIST))   0.8726012  1.2608305
center(log(LDIST))  0.8701474  1.1595438
center(log(AREA))   1.1097377  1.3759545
fGRAZE2             0.7220728  1.9262072
fGRAZE3             0.8053056  2.0168891
fGRAZE4             0.6726845  1.7483915
fGRAZE5             0.2252987  0.9246774
center(ALT)         0.9975746  1.0046366
center(YR.ISOL)     0.9923019  1.0103863

We can tidy the summary table and express the coefficients on the response scale. Recall that when we do so, the partial slopes are no longer additive. Rather, they represent a multiplicative change. Furthermore, effects greater than 1 represent an increase, whereas effects less than 1 represent an increase. Hence, an effect of 0.5 represents a halving.

Note also that confidence intervals should be interpreted relative to a value of 1 after such a back-transformation.

loyn_glm2 |> tidy(conf.int=TRUE, exponentiate = TRUE)
# A tibble: 10 × 7
   term               estimate std.error statistic  p.value conf.low conf.high
   <chr>                 <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
 1 (Intercept)          17.9     0.184     15.7    4.07e-20   12.7      25.7  
 2 center(log(DIST))     1.05    0.0929     0.560  5.78e- 1    0.873     1.26 
 3 center(log(LDIST))    1.00    0.0723     0.0301 9.76e- 1    0.870     1.16 
 4 center(log(AREA))     1.24    0.0508     4.16   1.37e- 4    1.11      1.38 
 5 fGRAZE2               1.17    0.253      0.624  5.36e- 1    0.722     1.93 
 6 fGRAZE3               1.27    0.230      1.05   2.97e- 1    0.805     2.02 
 7 fGRAZE4               1.08    0.249      0.295  7.69e- 1    0.673     1.75 
 8 fGRAZE5               0.460   0.372     -2.09   4.23e- 2    0.225     0.925
 9 center(ALT)           1.00    0.00186    0.602  5.50e- 1    0.998     1.00 
10 center(YR.ISOL)       1.00    0.00452    0.349  7.28e- 1    0.992     1.01 
loyn_glm2 |> glance()
# A tibble: 1 × 8
  null.deviance df.null logLik   AIC   BIC deviance df.residual  nobs
          <dbl>   <int>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1          31.1      55  -188.  398.  420.     12.0          46    56

The magnitude of (partial) slopes are dependent on the scale of each of the underlying predictors. Therefore, we cannot glean the relative importance of each of the predictors relative to each other simply by comparing their partial slopes. We could refit the model including scaled (and centred) versions of the predictors - this would put all partial slopes on a common scale thereby permitting comparisons. However, this would then make the individual partial slopes difficult to interpret with respect to the observed predictor scale. For example, what does a change of one unit on a scaled scale mean in real world measurements?

The alternative is to fit the model with unscaled (yet centred) predictors (as we have done) and then standardise the partial slopes by multiplication with the respective predictor standard deviations divided by the standard deviation of the response. This can be performed by the std.coef() function in the MuMIn package. This function can also correct for a certain degree of (multi)collinearity by standardising by partial standard deviation.

loyn_glm2 |> std.coef(partial.sd=TRUE)
                   Estimate* Std. Error* df
(Intercept)         0.000000    0.000000 46
center(log(DIST))   0.035553    0.063495 46
center(log(LDIST))  0.001914    0.063495 46
center(log(AREA))   0.264165    0.063495 46
fGRAZE2             0.039592    0.063495 46
fGRAZE3             0.066960    0.063495 46
fGRAZE4             0.018734    0.063495 46
fGRAZE5            -0.132643    0.063495 46
center(ALT)         0.038213    0.063495 46
center(YR.ISOL)     0.022178    0.063495 46
loyn_glm2 |> parameters() |> display()
Parameter Log-Prevalence SE 95% CI t(46) p
(Intercept) 2.88 0.18 (2.54, 3.25) 15.72 < .001
center(DIST) (center(log) 0.05 0.09 (-0.14, 0.23) 0.56 0.576
center(LDIST) (center(log) 2.18e-03 0.07 (-0.14, 0.15) 0.03 0.976
center(AREA) (center(log) 0.21 0.05 (0.10, 0.32) 4.16 < .001
fGRAZE (2) 0.16 0.25 (-0.33, 0.66) 0.62 0.533
fGRAZE (3) 0.24 0.23 (-0.22, 0.70) 1.05 0.292
fGRAZE (4) 0.07 0.25 (-0.40, 0.56) 0.30 0.768
fGRAZE (5) -0.78 0.37 (-1.49, -0.08) -2.09 0.037
center(ALT) 1.12e-03 1.86e-03 (-2.43e-03, 4.63e-03) 0.60 0.547
center(YR ISOL) 1.58e-03 4.52e-03 (-7.73e-03, 0.01) 0.35 0.727
loyn_glm2 |> parameters(exponentiate = TRUE) |> display()
Parameter Prevalence Ratio SE 95% CI t(46) p
(Intercept) 17.89 3.28 (12.70, 25.72) 15.72 < .001
center(DIST) (center(log) 1.05 0.10 (0.87, 1.26) 0.56 0.576
center(LDIST) (center(log) 1.00 0.07 (0.87, 1.16) 0.03 0.976
center(AREA) (center(log) 1.24 0.06 (1.11, 1.38) 4.16 < .001
fGRAZE (2) 1.17 0.30 (0.72, 1.93) 0.62 0.533
fGRAZE (3) 1.27 0.29 (0.81, 2.02) 1.05 0.292
fGRAZE (4) 1.08 0.27 (0.67, 1.75) 0.30 0.768
fGRAZE (5) 0.46 0.17 (0.23, 0.92) -2.09 0.037
center(ALT) 1.00 1.86e-03 (1.00, 1.00) 0.60 0.547
center(YR ISOL) 1.00 4.52e-03 (0.99, 1.01) 0.35 0.727
loyn_glm2 |> 
  standardise_parameters(method =  "basic") |>
  display()
# Standardization method: basic
Parameter Std. Coef. 95% CI
(Intercept) 0.00 [ 0.00, 0.00]
center(log(DIST)) 0.05 [-0.13, 0.22]
center(log(LDIST)) 2.88e-03 [-0.18, 0.20]
center(log(AREA)) 0.40 [ 0.19, 0.60]
fGRAZE2 0.06 [-0.11, 0.23]
fGRAZE3 0.11 [-0.10, 0.31]
fGRAZE4 0.02 [-0.13, 0.19]
fGRAZE5 -0.33 [-0.63, -0.03]
center(ALT) 0.05 [-0.11, 0.20]
center(YR.ISOL) 0.04 [-0.20, 0.26]
  • Response is unstandardized.
loyn_glm2 |> 
  standardise_parameters(method =  "basic") |>
  plot() 

loyn_glm2 |>
  standardise_parameters(method =  "basic") |> 
  equivalence_test() |> plot()

loyn_glm2 |>
  equivalence_test(range =  c(-0.1,0.1) * sd(log(loyn$ABUND))) |>
  plot()

10 Further analyses

From this point on, we will only proceed with the Gaussian (log-link) model.

loyn_glm1 |> MuMIn::r.squaredLR()
[1] 0.6922071
attr(,"adj.r.squared")
[1] 0.6925654
loyn_glm1 |> AIC()
[1] 379.7562
loyn_glm1 |> AICc()
[1] 385.7562

So far, the model that we have fit includes six predictors. If we had more predictors, providing the predictors were not correlated to any other predictor, we could add additional predictors in an attempt to further reduce the unexplained variance. However, as we do so, the complexity of the model increases (particularly if we start introducing interactions).

If we keep in mind that linear models are intentionally low dimensional representations that are intended to provide insights into the effects of major drivers and are not intended to reconstruct the full system complexity, we should realise that proposing overly complex models goes contrary to these intentions. Even the model we already have might be considered overly complex. Some of the predictors were found to have very little impact on the response, so a far question might be whether all the predictors really are necessary.

It is possible to fit all combinations of models containing the predictors and then comparing the fit of each of the models via information criterion. This is referred to as dredging.

# Option 1 - dredge
loyn_glm1 <- loyn_glm1 |> update(na.action=na.fail)
#options(width=1000)
loyn_glm1 |> dredge(rank = "AICc")
Fixed term is "(Intercept)"
Global model call: glm(formula = ABUND ~ center(log(DIST)) + center(log(LDIST)) + 
    center(log(AREA)) + fGRAZE + center(ALT) + center(YR.ISOL), 
    family = gaussian(link = "log"), data = loyn, na.action = na.fail)
---
Model selection table 
   (Int)   cnt(ALT) cnt(log(ARE)) cnt(log(DIS)) cnt(log(LDI)) cnt(YR.ISO) fGR
35 3.081                   0.1172                                           +
43 3.072                   0.1105                     0.03594               +
36 3.080 -4.274e-04        0.1216                                           +
39 3.074                   0.1164      0.012330                             +
51 3.082                   0.1170                              -6.439e-05   +
47 3.081                   0.1102     -0.018740       0.04233               +
59 3.082                   0.1084                     0.03788  -6.756e-04   +
44 3.072  1.617e-05        0.1103                     0.03621               +
52 3.088 -4.775e-04        0.1208                              -4.767e-04   +
40 3.077 -3.926e-04        0.1208      0.006492                             +
55 3.076                   0.1161      0.012580                -1.305e-04   +
63 3.091                   0.1082     -0.018680       0.04414  -6.670e-04   +
48 3.081 -6.443e-06        0.1103     -0.018760       0.04223               +
60 3.083 -4.806e-05        0.1089                     0.03715  -7.046e-04   +
56 3.085 -4.423e-04        0.1201      0.006559                -4.786e-04   +
19 2.908                   0.1471                               8.913e-03    
64 3.092 -7.436e-05        0.1090     -0.018960       0.04313  -7.133e-04   +
20 2.909  8.318e-04        0.1388                               8.837e-03    
27 2.907                   0.1537                    -0.02476   8.771e-03    
23 2.908                   0.1507     -0.023050                 8.813e-03    
28 2.908  6.335e-04        0.1444                    -0.01399   8.791e-03    
24 2.909  7.494e-04        0.1418     -0.013860                 8.792e-03    
31 2.907                   0.1541     -0.008552      -0.02125   8.752e-03    
41 3.288                                              0.08719               +
32 2.908  6.306e-04        0.1449     -0.008050      -0.01082   8.774e-03    
42 3.253  1.567e-03                                   0.10140               +
57 3.334                                              0.09001  -3.598e-03   +
45 3.306                              -0.036450       0.09897               +
33 3.354                                                                    +
58 3.293  1.259e-03                                   0.10150  -2.688e-03   +
46 3.271  1.532e-03                   -0.031040       0.11070               +
49 3.397                                                       -3.375e-03   +
61 3.346                              -0.027490       0.09841  -3.455e-03   +
34 3.339  8.770e-04                                                         +
37 3.321                               0.044540                             +
3  2.938                   0.1601                                            
62 3.305  1.240e-03                   -0.023580       0.10820  -2.558e-03   +
53 3.362                               0.051720                -3.601e-03   +
50 3.381  5.471e-04                                            -2.936e-03   +
38 3.293  1.054e-03                    0.054760                             +
11 2.941                   0.1665                    -0.04158                
7  2.938                   0.1664     -0.045510                              
4  2.942  8.101e-04        0.1491                                            
54 3.335  7.315e-04                    0.058780                -3.070e-03   +
15 2.941                   0.1683     -0.020870      -0.03395                
12 2.942  1.314e-04        0.1643                    -0.03893                
8  2.941  5.702e-04        0.1574     -0.036200                              
16 2.941  1.192e-04        0.1662     -0.020680      -0.03166                
26 2.927  3.664e-03                                   0.10890   1.009e-02    
30 2.927  3.734e-03                    0.032700       0.09794   9.890e-03    
22 2.930  3.478e-03                    0.106200                 9.387e-03    
18 2.933  3.148e-03                                             9.642e-03    
25 2.921                                              0.09188   1.325e-02    
17 2.931                                                        1.212e-02    
29 2.921                               0.008304       0.08867   1.321e-02    
21 2.927                               0.082230                 1.225e-02    
10 2.944  5.322e-03                                   0.10760                
6  2.948  4.823e-03                    0.128200                              
14 2.942  5.289e-03                    0.068840       0.08715                
2  2.952  4.613e-03                                                          
1  2.971                                                                     
5  2.968                               0.084070                              
9  2.969                                              0.04884                
13 2.967                               0.061350       0.02696                
   df   logLik  AICc delta weight
35  7 -179.540 375.4  0.00  0.343
43  8 -178.972 377.0  1.59  0.154
36  8 -179.431 377.9  2.51  0.098
39  8 -179.507 378.1  2.67  0.090
51  8 -179.540 378.1  2.73  0.088
47  9 -178.914 379.7  4.33  0.039
59  9 -178.938 379.8  4.37  0.038
44  9 -178.972 379.9  4.44  0.037
52  9 -179.415 380.7  5.33  0.024
40  9 -179.422 380.8  5.34  0.024
55  9 -179.506 380.9  5.51  0.022
63 10 -178.881 382.7  7.24  0.009
48 10 -178.914 382.7  7.30  0.009
60 10 -178.937 382.8  7.35  0.009
56 10 -179.407 383.7  8.29  0.005
19  4 -187.935 384.7  9.24  0.003
64 11 -178.878 385.8 10.34  0.002
20  5 -187.644 386.5 11.08  0.001
27  5 -187.718 386.6 11.22  0.001
23  5 -187.825 386.9 11.44  0.001
28  6 -187.591 388.9 13.48  0.000
24  6 -187.607 388.9 13.51  0.000
31  6 -187.708 389.1 13.72  0.000
41  7 -187.292 390.9 15.50  0.000
32  7 -187.582 391.5 16.08  0.000
42  8 -186.248 391.6 16.15  0.000
57  8 -186.475 392.0 16.60  0.000
45  8 -187.134 393.3 17.92  0.000
33  6 -189.827 393.4 17.96  0.000
58  9 -185.803 393.5 18.10  0.000
46  9 -186.134 394.2 18.77  0.000
49  7 -189.165 394.7 19.25  0.000
61  9 -186.386 394.7 19.27  0.000
34  7 -189.513 395.4 19.95  0.000
37  7 -189.529 395.4 19.98  0.000
3   3 -194.855 396.2 20.76  0.000
62 10 -185.737 396.4 20.95  0.000
53  8 -188.773 396.6 21.20  0.000
50  8 -189.044 397.2 21.74  0.000
38  8 -189.084 397.2 21.82  0.000
11  4 -194.362 397.5 22.09  0.000
7   4 -194.574 397.9 22.52  0.000
4   4 -194.637 398.1 22.64  0.000
54  9 -188.559 399.0 23.62  0.000
15  5 -194.319 399.8 24.42  0.000
12  5 -194.358 399.9 24.50  0.000
8   5 -194.478 400.2 24.74  0.000
16  6 -194.316 402.3 26.93  0.000
26  5 -198.958 409.1 33.70  0.000
30  6 -198.828 411.4 35.96  0.000
22  5 -200.492 412.2 36.77  0.000
18  4 -202.053 412.9 37.48  0.000
25  4 -202.058 412.9 37.49  0.000
17  3 -204.128 414.7 39.30  0.000
29  5 -202.051 415.3 39.89  0.000
21  4 -203.269 415.3 39.91  0.000
10  4 -205.470 419.7 44.31  0.000
6   4 -205.957 420.7 45.28  0.000
14  5 -205.071 421.3 45.93  0.000
2   3 -207.477 421.4 46.00  0.000
1   2 -211.871 428.0 52.56  0.000
5   3 -211.361 429.2 53.77  0.000
9   3 -211.482 429.4 54.01  0.000
13  4 -211.287 431.4 55.95  0.000
Models ranked by AICc(x) 

Conclusions:

  • each of the 64 combinations of model have been run and then ranked from lowest AICc to highest.
  • each of the models included an intercept - the estimated value of which is listed in the (Intercept) column of the output.
  • the ‘best’ model (model with lowest AICc), includes (all on log scale):
    • intercept
    • fGRAZE
    • scale(log(AREA))

We might consider the model with the lowest AICc to be the ‘best’ model. Any other models that are within 2 units would be considered equivalent and thus we might consider the ‘best’ model to be the model with the fewest used degrees of freedom (simplicity) that is within 2 units of the model with the lowest AICc. In this case, the model with the lowest AICc is also the most simple of the top ranking models - and thus the choice might seem clear.

The problem with this approach is that mathematically, one model must bubble up to be the ‘best’ model. This model is not necessarily the most sensible or appropriate model, it is just the model that the data supports most strongly.

It is also a form of ‘p-hacking’ in which an exhaustive set of models are fit in order to find something that is ‘significant’. Given that the probability of a type I error (finding a significant effect when it does not occur) for any single model is 0.05 (5%) and that this compounds with multiple tests, the ‘best’ model might actually be a type I error.

When we run the full model, the partial effects are the estimated effects of each predictor, holding the remaining predictors constant. In this way, each of the partial effects is an effect standardising across all the other predictors. This means that the value (and uncertainty) of any estimated partial slope is partly dependent on which other predictors are present in the model.

Given this, it might be interesting to explore a range of models and use the average of their parameter estimates. This is called model averaging. Rather than average across all models, we instead only average across models with reasonable fit. We can consider well fitting models as those that are within (for example) 4 units of AICc. Furthermore, rather than simply average all the models together equally, we can weight the averages by their relative AICc such that models with lower AICc have higher weights.

## 2. model averaging
loyn.av <- loyn_glm1 |>
    dredge(rank = "AICc") |>
    model.avg(subset=delta<=4)
Fixed term is "(Intercept)"
Fixed term is "(Intercept)"
loyn.av |> summary()

Call:
model.avg(object = dredge(loyn_glm1, rank = "AICc"), subset = delta <= 
    4)

Component model call: 
glm(formula = ABUND ~ <5 unique rhs>, family = gaussian(link = "log"), 
     data = loyn, na.action = na.fail)

Component models: 
    df  logLik   AICc delta weight
26   7 -179.54 375.41  0.00   0.44
246  8 -178.97 377.01  1.59   0.20
126  8 -179.43 377.93  2.51   0.13
236  8 -179.51 378.08  2.67   0.12
256  8 -179.54 378.14  2.73   0.11

Term codes: 
       center(ALT)  center(log(AREA))  center(log(DIST)) center(log(LDIST)) 
                 1                  2                  3                  4 
   center(YR.ISOL)             fGRAZE 
                 5                  6 

Model-averaged coefficients:  
(full average) 
                     Estimate Std. Error Adjusted SE z value Pr(>|z|)    
(Intercept)         3.078e+00  9.609e-02   9.849e-02  31.253  < 2e-16 ***
center(log(AREA))   1.163e-01  2.727e-02   2.794e-02   4.162 3.16e-05 ***
fGRAZE2             4.550e-02  1.311e-01   1.344e-01   0.339 0.734893    
fGRAZE3             2.213e-02  1.224e-01   1.254e-01   0.176 0.859954    
fGRAZE4            -1.761e-02  1.482e-01   1.519e-01   0.116 0.907685    
fGRAZE5            -1.047e+00  2.919e-01   2.992e-01   3.499 0.000467 ***
center(log(LDIST))  7.184e-03  2.162e-02   2.193e-02   0.328 0.743195    
center(ALT)        -5.399e-05  3.744e-04   3.826e-04   0.141 0.887770    
center(log(DIST))   1.443e-03  1.781e-02   1.824e-02   0.079 0.936950    
center(YR.ISOL)    -7.294e-06  9.374e-04   9.612e-04   0.008 0.993945    
 
(conditional average) 
                     Estimate Std. Error Adjusted SE z value Pr(>|z|)    
(Intercept)         3.078e+00  9.609e-02   9.849e-02  31.253  < 2e-16 ***
center(log(AREA))   1.163e-01  2.727e-02   2.794e-02   4.162 3.16e-05 ***
fGRAZE2             4.550e-02  1.311e-01   1.344e-01   0.339 0.734893    
fGRAZE3             2.213e-02  1.224e-01   1.254e-01   0.176 0.859954    
fGRAZE4            -1.761e-02  1.482e-01   1.519e-01   0.116 0.907685    
fGRAZE5            -1.047e+00  2.919e-01   2.992e-01   3.499 0.000467 ***
center(log(LDIST))  3.594e-02  3.612e-02   3.704e-02   0.970 0.331901    
center(ALT)        -4.275e-04  9.749e-04   9.996e-04   0.428 0.668915    
center(log(DIST))   1.233e-02  5.076e-02   5.205e-02   0.237 0.812720    
center(YR.ISOL)    -6.439e-05  2.785e-03   2.855e-03   0.023 0.982007    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## put the following back after an update of the packages (particularly MuMIn)
## loyn.av |> confint()

Conclusions:

  • Model-average coefficients are the weighted model averages that assume that all predictors are present in all models and that when they are actually absent (e.g ALT, DIST, LDIST and YR.ISOL in the top ranked model from dredge), the estimated partial slope is assumed to be 0. This has the effect of biasing the averaged parameters towards 0.
  • Conditional average coefficients only average over the models in which the parameter (predictor) is actually present. These models have a tendency to be biased away from zero.

Arguably, if our intentions are to gain insights into what influences bird abundances in fragmented forests, rather than fit a model with a large range of predictors, it might be better to fit a number of models, each of which focuses on a different aspect of the avian ecology.

Rather than search for the single ‘best’ model, we could propose a small set of ecologically plausible candidate models and then see which ones are supported by the data. For example, in the current context of birds in fragmented forests, we could propose the following models:

  1. a model that focuses on the connectivity between patches (DIST and LDIST)
  2. a model that focuses on the patch habitats (AREA, fGRAZE)
  3. a model that focuses on the patch use and history (AREA, fGRAZE, YR.ISOL)
  4. a model that focuses on climate (ALT)
  5. a NULL model. The NULL model can be used as a reference point. Any model that has an AICc less (by at least 2 units) than (and thus better) than the NULL model must have some inferential support. Any model with AICc the same or higher than the NULL, clearly has no support and would not be considered a useful model

Note, these are all models that could be proposed prior to even collecting the data and all can be explained ecologically. By contrast, dredging by chance could suggest a model that has a combination of predictors that are very difficult to interpret and explain.

As the above models contain fewer predictors, there is now scope to include interactions.

loyn_glm1a <- loyn_glm1 |> update(.~center(log(DIST)) * center(log(LDIST)))
loyn_glm1a <- loyn_glm1 |> update(.~center(log(DIST)) * center(log(LDIST)))
loyn_glm1b <- loyn_glm1 |> update(.~center(log(AREA)) * fGRAZE)
loyn_glm1c <- loyn_glm1 |> update(.~center(log(AREA)) * fGRAZE * center(YR.ISOL))
loyn_glm1d <- loyn_glm1 |> update(.~center(ALT))
loyn_glm1d <- loyn_glm1 |> update(.~center(ALT))
loyn.null <- loyn_glm1 |> update(.~1)
AICc(loyn_glm1a, loyn_glm1b, loyn_glm1c, loyn_glm1d, loyn.null)
           df     AICc
loyn_glm1a  5 433.0175
loyn_glm1b 11 370.5744
loyn_glm1c 21 406.7344
loyn_glm1d  3 421.4164
loyn.null   2 427.9690
AICc(loyn_glm1a, loyn_glm1b, loyn_glm1d, loyn.null)
           df     AICc
loyn_glm1a  5 433.0175
loyn_glm1b 11 370.5744
loyn_glm1d  3 421.4164
loyn.null   2 427.9690
AICc(loyn_glm1a, loyn_glm1b, loyn_glm1c, loyn_glm1d, loyn.null) |>
  rownames_as_column("Model") |> 
    mutate(delta=AICc-AICc[5]) |>
    dplyr::select(Model, everything())
       Model df     AICc      delta
1 loyn_glm1a  5 433.0175   5.048501
2 loyn_glm1b 11 370.5744 -57.394659
3 loyn_glm1c 21 406.7344 -21.234636
4 loyn_glm1d  3 421.4164  -6.552658
5  loyn.null  2 427.9690   0.000000
## Support for model 2, 3 and 4, no support for model 1

Conclusions:

  • there is support for models 2, 3 and 4
  • there is very little support for a model focusing on the connectivity between patches. Perhaps the patches are sufficiently close that the lack of direct corridors is not a barrier for the birds (which can fly after all!).
  • patch habitat and history are however, potentially important drivers of bird abundances as is (to a lesser degree) altitude.
loyn_glm1b |> summary()

Call:
glm(formula = ABUND ~ center(log(AREA)) + fGRAZE + center(log(AREA)):fGRAZE, 
    family = gaussian(link = "log"), data = loyn, na.action = na.fail)

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)                3.24321    0.08978  36.126  < 2e-16 ***
center(log(AREA))          0.05475    0.03097   1.768 0.083733 .  
fGRAZE2                   -0.15645    0.13308  -1.176 0.245792    
fGRAZE3                   -0.15315    0.11314  -1.354 0.182452    
fGRAZE4                   -0.23802    0.15036  -1.583 0.120291    
fGRAZE5                   -1.00394    0.24264  -4.138 0.000148 ***
center(log(AREA)):fGRAZE2  0.12339    0.06617   1.865 0.068608 .  
center(log(AREA)):fGRAZE3  0.15174    0.06501   2.334 0.024019 *  
center(log(AREA)):fGRAZE4  0.43933    0.22471   1.955 0.056659 .  
center(log(AREA)):fGRAZE5  0.31603    0.18885   1.673 0.101027    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 32.33763)

    Null deviance: 6337.9  on 55  degrees of freedom
Residual deviance: 1487.5  on 46  degrees of freedom
AIC: 364.57

Number of Fisher Scoring iterations: 7

Conclusions:

  • now that we have interactions in the model, we need to consider these interactions prior to any attempts to interpret the main effects.
  • that said, as this is the first time in the series that we have considered interactions, it is worth going through each of the coefficients to consider their interpretations. Keep in mind, that all parameters are still on a log scale.
  • in the presence of a categorical predictor, many of the main effects (as well as the intercept), only directly pertain to the first (reference) level of this categorical predictor (in this case level 1 of Grazing).
  • the intercept of 3.24 indicates that the for Grazing level 1, when all of the other predictors are at their averages, the expected number of birds (on a log scale) is 0.1
  • for Grazing level 1, the partial slope associated with (centred log transformed) Area is 0.05.
  • the difference between the intercept of the first Grazing level and the second Grazing level is -0.16, although this is not considered a significant difference.
  • the difference between the intercept and Grazing levels 3, 4 and 5 are -0.15, -0.24 and -1 respectively (the later of which is considered a significant difference).
  • the four interaction terms compare the partial slopes of Area between Grazing level 1 and each of levels 2, 3, 4 and 5. Hence the partial slope for Area associated with Grazing level 2 is 0.12 units greater (though not significant) than that associated with Grazing level 1.
  • the presence of an interaction effect indicates that we cannot simply conclude that the rate of change (partial slope) associated with Area is a specific value as there is some evidence that the slope is to some degree dependent on Grazing level.
loyn_glm1b |>
  estimate_slopes(trend = "AREA", at =  "fGRAZE", length =  100) |>
  plot()

We do know that the partial slope associated with Area for Grazing level 3 is different to that associated with Grazing level 1, yet we don’t directly know what the slope associated with Grazing level 1 is. Furthermore, we do not know how it compares to the slopes associated with other Grazing levels. We can use the ‘emtrends()’ function to explore this further.

loyn_glm1b |> emtrends(pairwise~fGRAZE, var='log(AREA)')
$emtrends
 fGRAZE log(AREA).trend     SE df lower.CL upper.CL
 1               0.0547 0.0310 46 -0.00759    0.117
 2               0.1781 0.0585 46  0.06043    0.296
 3               0.2065 0.0572 46  0.09143    0.322
 4               0.4941 0.2226 46  0.04609    0.942
 5               0.3708 0.1863 46 -0.00421    0.746

Confidence level used: 0.95 

$contrasts
 contrast          estimate     SE df t.ratio p.value
 fGRAZE1 - fGRAZE2  -0.1234 0.0662 46  -1.865  0.3505
 fGRAZE1 - fGRAZE3  -0.1517 0.0650 46  -2.334  0.1528
 fGRAZE1 - fGRAZE4  -0.4393 0.2247 46  -1.955  0.3039
 fGRAZE1 - fGRAZE5  -0.3160 0.1888 46  -1.673  0.4600
 fGRAZE2 - fGRAZE3  -0.0284 0.0818 46  -0.347  0.9968
 fGRAZE2 - fGRAZE4  -0.3159 0.2301 46  -1.373  0.6477
 fGRAZE2 - fGRAZE5  -0.1926 0.1953 46  -0.987  0.8600
 fGRAZE3 - fGRAZE4  -0.2876 0.2298 46  -1.252  0.7215
 fGRAZE3 - fGRAZE5  -0.1643 0.1949 46  -0.843  0.9156
 fGRAZE4 - fGRAZE5   0.1233 0.2902 46   0.425  0.9930

P value adjustment: tukey method for comparing a family of 5 estimates 
## loyn_glm1b |> emtrends(~fGRAZE, var='log(AREA)') |> pairs()
loyn_glm1b |> emtrends(pairwise~fGRAZE, var='AREA')
$emtrends
 fGRAZE AREA.trend       SE df  lower.CL upper.CL
 1         0.00078 0.000441 46 -0.000108  0.00167
 2         0.00254 0.000834 46  0.000861  0.00422
 3         0.00294 0.000815 46  0.001303  0.00458
 4         0.00704 0.003173 46  0.000657  0.01343
 5         0.00529 0.002656 46 -0.000060  0.01063

Confidence level used: 0.95 

$contrasts
 contrast           estimate       SE df t.ratio p.value
 fGRAZE1 - fGRAZE2 -0.001759 0.000943 46  -1.865  0.3505
 fGRAZE1 - fGRAZE3 -0.002163 0.000927 46  -2.334  0.1528
 fGRAZE1 - fGRAZE4 -0.006263 0.003203 46  -1.955  0.3039
 fGRAZE1 - fGRAZE5 -0.004505 0.002692 46  -1.673  0.4600
 fGRAZE2 - fGRAZE3 -0.000404 0.001166 46  -0.347  0.9968
 fGRAZE2 - fGRAZE4 -0.004504 0.003280 46  -1.373  0.6477
 fGRAZE2 - fGRAZE5 -0.002746 0.002783 46  -0.987  0.8600
 fGRAZE3 - fGRAZE4 -0.004100 0.003276 46  -1.252  0.7215
 fGRAZE3 - fGRAZE5 -0.002342 0.002778 46  -0.843  0.9156
 fGRAZE4 - fGRAZE5  0.001758 0.004137 46   0.425  0.9930

P value adjustment: tukey method for comparing a family of 5 estimates 

Conclusions:

  • the first part of the output displays the partial slopes (for Area) associated with each of the five Grazing levels. We see that the first of those, is the same partial slope that was reported in the summary from the model itself.
    • note that each of the slopes has a value greater than 1 - suggesting positive relationships. Furthermore, in all but the first partial slope, the confidence intervals suggest significant relationships.
  • the second part of the output displays the differences in partial slopes between each pair of Grazing levels.
    • evidently, non of the pairwise slope differences are significant - although it is worth keeping in mind that each individual comparison has sacrificed power. For example, the summary table indicated that there was inferential evidence of a difference in partial slopes between Grazing level 1 and 3, however when compared in the context of Tukey’s contrasts, this is not significant.
loyn_glm1b |> emmeans(~fGRAZE, type = 'link') |> regrid() |> pairs() |> summary(infer = TRUE)
NOTE: Results may be misleading due to involvement in interactions
 contrast          estimate    SE df lower.CL upper.CL t.ratio p.value
 fGRAZE1 - fGRAZE2    -3.08  4.02 46    -14.5     8.33  -0.765  0.9392
 fGRAZE1 - fGRAZE3    -5.13  4.49 46    -17.9     7.62  -1.142  0.7835
 fGRAZE1 - fGRAZE4   -28.04 23.23 46    -94.0    37.90  -1.207  0.7473
 fGRAZE1 - fGRAZE5     8.34  9.85 46    -19.6    36.30   0.846  0.9146
 fGRAZE2 - fGRAZE3    -2.05  5.60 46    -17.9    13.83  -0.367  0.9960
 fGRAZE2 - fGRAZE4   -24.96 23.47 46    -91.6    41.66  -1.064  0.8239
 fGRAZE2 - fGRAZE5    11.41 10.40 46    -18.1    40.94   1.097  0.8071
 fGRAZE3 - fGRAZE4   -22.91 23.55 46    -89.8    43.95  -0.973  0.8660
 fGRAZE3 - fGRAZE5    13.47 10.59 46    -16.6    43.54   1.271  0.7098
 fGRAZE4 - fGRAZE5    36.37 25.13 46    -35.0   107.72   1.447  0.6010

Confidence level used: 0.95 
Conf-level adjustment: tukey method for comparing a family of 5 estimates 
P value adjustment: tukey method for comparing a family of 5 estimates 

11 Summary figures

Since our summary figure will feature both modelled predictors, we might as well overlay the trends onto the raw data.

## loyn_grid <- loyn_glm1b |>
##     insight::get_data() |>
##     modelr::data_grid(AREA=modelr::seq_range(AREA, n=10)) |>
##     as.list()

## ref_grid(loyn_glm1b, cov.reduce=function(x) seq_range(x, n=10)) |>
##     emmeans(~AREA|fGRAZE)



## Using emmeans
loyn_grid <- with(loyn,  list(fGRAZE = levels(fGRAZE),
                              AREA = seq(min(AREA),  max(AREA),  len=100)))
## OR
loyn_grid <- with(loyn,  list(fGRAZE = levels(fGRAZE),
                            AREA = seq_range(AREA,  n=100)))
newdata <- loyn_glm1b |>
    emmeans(~AREA|fGRAZE,  at=loyn_grid,  type='response') |>
  as.data.frame()
head(newdata)
     AREA fGRAZE response       SE df lower.CL upper.CL
  0.10000 1      20.07867 4.335905 46 13.00042 31.01077
 17.98788 1      26.68043 1.944988 46 23.03907 30.89731
 35.87576 1      27.70816 1.689080 46 24.50854 31.32549
 53.76364 1      28.32868 1.603612 46 25.27789 31.74768
 71.65152 1      28.77767 1.584518 46 25.75859 32.15060
 89.53939 1      29.13095 1.597348 46 26.08675 32.53040

Confidence level used: 0.95 
Intervals are back-transformed from the log scale 
## for key_glyph see ?draw_key
ggplot(newdata, aes(y=response, x=AREA, color=fGRAZE, fill=fGRAZE)) +
  geom_point(data=loyn,  aes(y=ABUND,  color=fGRAZE)) +
  ## geom_ribbon(aes(ymin=asymp.LCL, ymax=asymp.UCL), color=NA, alpha=0.3) +
  geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), color=NA, alpha=0.3) +
  geom_line(key_glyph = "path") +
  scale_x_log10(labels=scales::comma)+
  scale_y_log10('Abundance', breaks=as.vector(c(1,2,5,10) %o% 10^(0:2))) +
  theme_classic()

If we want to limit the range of predictions within each Grazing level…

loyn_grid <- loyn |>
    filter(fGRAZE==1) |>
    with(list(fGRAZE='1', AREA=seq_range(AREA,  n=100)))
newdata.1 = emmeans(loyn_glm1b,  ~AREA|fGRAZE,  at=loyn_grid,  type='response') |>
  as.data.frame()

loyn_grid <- loyn |>
    filter(fGRAZE==2) |>
    with(list(fGRAZE='2', AREA=seq_range(AREA,  n=100)))
newdata.2 = emmeans(loyn_glm1b,  ~AREA|fGRAZE,  at=loyn_grid,  type='response') |>
  as.data.frame()

loyn_grid <- loyn |>
    filter(fGRAZE==3) |>
    with(list(fGRAZE='3', AREA=seq_range(AREA,  n=100)))
newdata.3 = emmeans(loyn_glm1b,  ~AREA|fGRAZE,  at=loyn_grid,  type='response') |>
  as.data.frame()

loyn_grid <- loyn |>
    filter(fGRAZE==4) |>
    with(list(fGRAZE='4', AREA=seq_range(AREA,  n=100)))
newdata.4 = emmeans(loyn_glm1b,  ~AREA|fGRAZE,  at=loyn_grid,  type='response') |>
  as.data.frame()

loyn_grid <- loyn |>
    filter(fGRAZE==5) |>
    with(list(fGRAZE='5', AREA=seq_range(AREA,  n=100)))
newdata.5 = emmeans(loyn_glm1b,  ~AREA|fGRAZE,  at=loyn_grid,  type='response') |>
  as.data.frame()

newdata.1 |>
    bind_rows(newdata.2) |>
    bind_rows(newdata.3) |>
    bind_rows(newdata.4) |>
    bind_rows(newdata.5) |>
    ggplot(aes(y=response, x=AREA, color=fGRAZE, fill=fGRAZE)) +
    geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), color=NA, alpha=0.3) +
    geom_line() +
    scale_x_log10(labels=scales::comma)+
    scale_y_log10('Abundance') +
  theme_classic() +
  scale_color_viridis_d() +
  scale_fill_viridis_d() +
  geom_point(data=loyn,  aes(y=ABUND,  color=fGRAZE))

loyn |>
    group_by(fGRAZE) |>
    nest() |>
    mutate(Grid = map2(.x=data, .y=fGRAZE, ~list(fGRAZE=unique(.y), AREA=seq_range(.x$AREA, n=100))),
           Pred = map(.x=Grid, ~emmeans(loyn_glm1b, ~AREA|fGRAZE, at=.x, type='response') |> as.data.frame())
           ) |>
    ungroup() |>
    dplyr::select(Pred) |>
    unnest(Pred) |>
    ggplot(aes(y=response, x=AREA, color=fGRAZE, fill=fGRAZE)) +
    geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), color=NA, alpha=0.3) +
    geom_line() +
    scale_x_log10(labels=scales::comma)+
    scale_y_log10('Abundance') +
    theme_classic() +
    scale_color_viridis_d() +
    scale_fill_viridis_d() +
    geom_point(data=loyn,  aes(y=ABUND,  color=fGRAZE))

loyn_grid = with(loyn,  list(fGRAZE=levels(fGRAZE),
                             AREA=as.vector(seq(1, 10, len=10) %o% 10^(-1:3)) |> unique()))

newdata = emmeans(loyn_glm1b, ~AREA|fGRAZE,
                  at=loyn_grid,
                  type='response') |>
    as.data.frame()
head(newdata)
 AREA fGRAZE response       SE df lower.CL upper.CL
  0.1 1      20.07867 4.335905 46 13.00042 31.01077
  0.2 1      20.85528 4.072151 46 14.07745 30.89642
  0.3 1      21.32342 3.907277 46 14.74591 30.83488
  0.4 1      21.66193 3.785579 46 15.23794 30.79414
  0.5 1      21.92819 3.688519 46 15.62990 30.76447
  0.6 1      22.14817 3.607515 46 15.95692 30.74162

Confidence level used: 0.95 
Intervals are back-transformed from the log scale 
loyn.limits = loyn |>
  group_by(fGRAZE) |>
  summarise(Min=min(AREA),  Max=max(AREA))
newdata1 = newdata |>
  full_join(loyn.limits) |>
  filter(AREA>=Min,  AREA<=Max)
Joining with `by = join_by(fGRAZE)`
head(newdata1)
 AREA fGRAZE response       SE df lower.CL upper.CL Min  Max
    2 1      23.65729 3.037172 46 18.26984 30.63340   2 1771
    3 1      24.18833 2.833642 46 19.10712 30.62079   2 1771
    4 1      24.57231 2.687085 46 19.71741 30.62262   2 1771
    5 1      24.87435 2.572803 46 20.19913 30.63169   2 1771
    6 1      25.12389 2.479443 46 20.59749 30.64499   2 1771
    7 1      25.33682 2.400805 46 20.93716 30.66101   2 1771

Confidence level used: 0.95 
Intervals are back-transformed from the log scale 
ggplot(newdata1, aes(y=response, x=AREA, color=fGRAZE, fill=fGRAZE)) +
    geom_ribbon(aes(ymin=lower.CL, ymax=upper.CL), color=NA, alpha=0.3) +
    geom_line() +
    scale_x_log10(labels=scales::comma)+
    scale_y_log10('Abundance') +
  theme_classic() +
  scale_color_viridis_d() +
  scale_fill_viridis_d() +
  geom_point(data=loyn,  aes(y=ABUND,  color=fGRAZE))

loyn_glm1b |>
  estimate_expectation() |>
  plot() 

loyn_glm1b |>
  estimate_expectation(at = "AREA = [range]") |>
  plot() +
  scale_x_log10("Patch area (Ha²)", labels = scales::label_number()) +
  scale_y_log10("Bird abundance") +
  scale_fill_viridis_d("Grazing\nintensity") +
  scale_colour_viridis_d("Grazing\nintensity") +
  theme_classic() +
  theme(legend.position = c(1, 0),
    legend.justification = c(1, 0)) +
  ggtitle("")

## If we wanted all lines to have the same range..
loyn_glm1b |>
  estimate_expectation(preserve_range = FALSE, data =  "grid") |>
  plot() +
  scale_x_log10("Patch area (Ha²)", labels = scales::label_number()) +
  scale_y_log10("Bird abundance")
Warning in self$trans$transform(x): NaNs produced
Warning: Transformation introduced infinite values in continuous y-axis

12 References

Loyn, R. H. 1987. “Nature Conservation: The Role of Remnants of Native Vegetation.” In, edited by D. A. Saunders, G. W. Arnold, A. A. Burbridge, and A. J. M. Hopkins. Chipping Norton, NSW: Surrey Beatty & Sons.